__author__ = 'Sebastian Enger, M.Sc.'

import sys
import pprint
import nltk
from nltk.tokenize import sent_tokenize
#nltk.download()

reload(sys)
sys.setdefaultencoding('utf8')

pp = pprint.PrettyPrinter(indent=4)




#delimiters      = ['\n', ' ', ',', '.', '?', '!', ':', ';', '\s', '\t', '\r']

filename        = sys.argv[-1]

# read file into string
text            = open(filename, 'r').read()
#text.decode('utf-8')

sent_tokenize_list = sent_tokenize(text)

for ele in sent_tokenize_list:
    word_list = ele.split()

    for word in word_list:
        if word.istitle():
            print "Word: ",word, " -> gross geschrieben\n"
    #pp.pprint(word_list)